knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
library(tidyverse)
library(ggplot2)
library(ggthemes)
library(kableExtra)
library(mgcv)
library(ggpmisc)
library(mgcViz)
library(viridis)

source('scripts/TSS_mclapply_ie_stn_month.R')

load.Rdata <- function( filename, objname )
{
  d1 <- load( filename )
  eval( parse( text=paste( objname,  "<<- ", d1  ) ) )
}

dat<- as.data.frame(read.delim("data/cal_vert_data_std_light_bloom_for_GAMs.txt"))

month_assign_cfin=data.frame(Month=1:12, TAXON="Cfin") %>% mutate(season=ifelse(Month >6| Month <3, "D", "A"))
month_assign_chyp=data.frame(Month=1:12, TAXON="Chyp") %>% mutate(season=ifelse(Month >3, "D", "A"))
month_assign_cglac=data.frame(Month=1:12, TAXON="Cglac") %>% mutate(season=ifelse(Month %in% c(4,5), "A", "D"))

month_assign<- rbind(month_assign_cfin,month_assign_chyp,month_assign_cglac)



dat<- left_join(dat %>%  mutate(fMonth=as.factor(Month),
                                             cum_n_m2 = ind.m2_total * pcum), month_assign) # for simulation



theme_cl<- theme_few()+theme(axis.title.x = element_text(size=14, colour="black"),
                             axis.text.x = element_text(size=13, colour='black'),
                             axis.title.y = element_text(size=14, colour="black"),
                             axis.text.y = element_text(size=13, colour='black'),
                             legend.title=element_blank(),
                             panel.border = element_rect(colour = "black"),
                             axis.ticks = element_line(colour="black"),
                             strip.text.x = element_text(size=14, face="bold"),
                             strip.text.y = element_text(size=14, face="bold")) 

reducing eps in gam formula with betar distribution has a positive effect on the normality of random effects.

What we need to know about the data from the EcoMon survey

criteria for correction: 1) % of depth sampled < 95% (tow_depth/sta_depth) 2) sta_depth-tow_depth > 10. if sta_depth-tow_depth < 10, then considered as sampled properly.

#to be replaced by Ben package?
library(tidyxl)
ecomon<- readxl::read_excel(sheet="Data","C:/LEHOUX/Projects/Données_copépodes/ACCASP2020/Calfin_Thru_12_30_2019_10m2.xlsx")%>%  mutate(perc_sampled= tow_depth/sta_depth *100,
                                                       to_correct=ifelse(perc_sampled < 95 & (sta_depth-tow_depth) >10, "Y", "N"),
                   Zcat = as.factor(ifelse(sta_depth < 200, "0-200",
                                           ifelse(sta_depth >=200 & sta_depth < 300, "200-300",
                                                  ifelse(sta_depth >=300 & sta_depth < 500, "300-500",
                                                         ifelse(sta_depth>=500 & sta_depth < 1000, "500-1000", 
                                                             ifelse(sta_depth>=1000, "1000+" , NA)))))))


ecomon %>%  group_by(to_correct,Zcat) %>%  tally()
## # A tibble: 8 × 3
## # Groups:   to_correct [2]
##   to_correct Zcat         n
##   <chr>      <fct>    <int>
## 1 N          0-200    20502
## 2 N          200-300    627
## 3 N          300-500      3
## 4 Y          0-200     1403
## 5 Y          1000+        5
## 6 Y          200-300   1563
## 7 Y          300-500    269
## 8 Y          500-1000    15

Cfin CVI

load("gamms/Cfin_CVIgamms3_4_remove01.RData"); dattax_month <- dattax %>%  mutate(fMonth=as.factor(Month))
load("gamms/Cfin_CVIgammsseason_remove01.RData"); dattax_season <- dattax %>%  mutate(fMonth=as.factor(Month))

Table

AIC(mod3,mod4, modseason)
##                 df        AIC
## mod3      73.65314  -980.0647
## mod4      47.62006 -2209.9254
## modseason 60.45941  -950.3503
#doesn't work don't know why?? all factor levels are there. doesn't even wirk with predict dataC 
#tss_tablemod3<- TSS_mclapply_ie_stn(mcomp=mod3, Data=dattax_month, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
#tss_tablemod4<- TSS_mclapply_ie_stn(mcomp=mod4, Data=dattax_month, gam_type="standard", fam="betar(eps=0.0000001)", nrem=4,Sp="Cfin_CVI")  
#tss_tablemodseason<- TSS_mclapply_ie_stn(mcomp=modseason, Data=dattax_season, gam_type="standard", fam="betar(eps=0.0000001)", nrem=4,Sp="Cfin_CVI")  
#tss_table<- rbind(tss_tablemod3,tss_tablemod4,tss_tablemodseason)

#  kbl(tss_table) %>%
#  kable_paper(full_width = F) 

Profiles

Month as factor

month as factor ### Month as continuous month as continuous ### Season month as season

Residuals patterns

Month as factor

dattax_month <-  dattax_month %>%  mutate(residuals=residuals(mod3),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_month, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

 dattax_month$linpred<- as.vector(predict(mod3, type="link"))

ggplot(dattax_month, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=predict(mod3, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Month as continuous

dattax_month <-  dattax_month %>%  mutate(residuals=residuals(mod4),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_month, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

 dattax_month$linpred<- as.vector(predict(mod4, type="link"))

ggplot(dattax_month, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=predict(mod4, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Season

dattax_season <-  dattax_season %>%  mutate(residuals=residuals(modseason),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_season, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

dattax_season$linpred<- as.vector(predict(mod4, type="link"))

ggplot(dattax_season, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_season, aes(y=predict(modseason, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Vis.gam

Month as factor

print(plot(getViz(mod3), all.terms=T, select=ncol(predict(mod3, type="terms"))))

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="3"), main="Month3")

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="8"), main="Month8")

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="10"), main="Month10")

print(plot(getViz(mod3), all.terms=T), pages=1)

new_data <- tidyr::expand(dattax_month, nesting(ID,fMonth),
                          Zstation = seq(50,500,10),
                          percZ_stn=0.5)
m1_pred <- bind_cols(new_data,
                     as.data.frame(predict(mod3, newdata = new_data,
                                           se.fit = TRUE)))

ggplot(m1_pred, aes(x = Zstation, y = fit, group = ID,
                    colour = fMonth)) +
    geom_line() + theme_cl+
    facet_wrap(~ fMonth) 

#re<- as.data.frame(predict(mod3, type="terms")) %>%  select(`s(ID)`)
#dattax_month$re<- re$`s(ID)`
#dattax_month$residuals<-residuals(mod3)#

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=ind.m2_total))+ scale_color_viridis(option="turbo",trans="log10") +theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=LONGITUDE)) +
#scale_color_viridis(option="turbo")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=Zstation))+ scale_color_viridis(option="turbo",trans="log10")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=Year))+ 
#scale_color_viridis(option="turbo")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=fMonth)) +
#scale_color_viridis_d(option="turbo")+theme_cl

Month as continuous

print(plot(getViz(mod4), all.terms=T, select=ncol(predict(mod4, type="terms"))))

season

Season

print(plot(getViz(modseason), all.terms=T, select=ncol(predict(modseason, type="terms"))))

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="D"), main="Diapause")

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="A"), main="Active")

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="A"), main="Active")

print(plot(getViz(modseason), all.terms=T), pages=1)

Simulation of bioness data

datsp <-  dat %>%  filter(TAXON_STADE=="Cfin_CVI")

Month as factor

dat200 <-  datsp %>%  dplyr::filter(ZMax < 200 & Zstation > 200) %>% dplyr::group_by(ID) %>%   dplyr::slice(which.max(ZMax)) 

dat_zstation <-  datsp %>% dplyr::group_by(ID) %>%   slice(which.max(ZMax))

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(mod3, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Month as continuous

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(mod4, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Season

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(modseason, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Cfin CV

load("gamms/Cfin_CVgamms3_4_remove01.RData"); dattax_month <- dattax %>%  mutate(fMonth=as.factor(Month))
load("gamms/Cfin_CVgammsseason_remove01.RData"); dattax_season <- dattax %>%  mutate(fMonth=as.factor(Month))

Table

AIC(mod3,mod4, modseason)
##                 df        AIC
## mod3      237.2257  -4282.196
## mod4      149.0354 -11564.548
## modseason 157.3500  -4231.050
#doesn't work don't know why?? all factor levels are there. doesn't even wirk with predict dataC 
#tss_tablemod3<- TSS_mclapply_ie_stn(mcomp=mod3, Data=dattax_month, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
#tss_tablemod4<- TSS_mclapply_ie_stn(mcomp=mod4, Data=dattax_month, gam_type="standard", fam="betar(eps=0.0000001)", nrem=4,Sp="Cfin_CVI")  
#tss_tablemodseason<- TSS_mclapply_ie_stn(mcomp=modseason, Data=dattax_season, gam_type="standard", fam="betar(eps=0.0000001)", nrem=4,Sp="Cfin_CVI")  
#tss_table<- rbind(tss_tablemod3,tss_tablemod4,tss_tablemodseason)

#  kbl(tss_table) %>%
#  kable_paper(full_width = F) 

Profiles

Month as factor

month as factor ### Month as continuous month as continuous ### Season month as season

Residuals patterns

Month as factor

dattax_month <-  dattax_month %>%  mutate(residuals=residuals(mod3),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_month, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

 dattax_month$linpred<- as.vector(predict(mod3, type="link"))

ggplot(dattax_month, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=predict(mod3, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Month as continuous

dattax_month <-  dattax_month %>%  mutate(residuals=residuals(mod4),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_month, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

 dattax_month$linpred<- as.vector(predict(mod4, type="link"))

ggplot(dattax_month, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=predict(mod4, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Season

dattax_season <-  dattax_season %>%  mutate(residuals=residuals(modseason),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_season, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

dattax_season$linpred<- as.vector(predict(mod4, type="link"))

ggplot(dattax_season, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_season, aes(y=predict(modseason, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Vis.gam

Month as factor

print(plot(getViz(mod3), all.terms=T, select=ncol(predict(mod3, type="terms"))))

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="3"), main="Month3")

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="8"), main="Month8")

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="10"), main="Month10")

print(plot(getViz(mod3), all.terms=T), pages=1)

new_data <- tidyr::expand(dattax_month, nesting(ID,fMonth),
                          Zstation = seq(50,500,10),
                          percZ_stn=0.5)
m1_pred <- bind_cols(new_data,
                     as.data.frame(predict(mod3, newdata = new_data,
                                           se.fit = TRUE)))

ggplot(m1_pred, aes(x = Zstation, y = fit, group = ID,
                    colour = fMonth)) +
    geom_line() + theme_cl+
    facet_wrap(~ fMonth) 

#re<- as.data.frame(predict(mod3, type="terms")) %>%  select(`s(ID)`)
#dattax_month$re<- re$`s(ID)`
#dattax_month$residuals<-residuals(mod3)#

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=ind.m2_total))+ scale_color_viridis(option="turbo",trans="log10") +theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=LONGITUDE)) +
#scale_color_viridis(option="turbo")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=Zstation))+ scale_color_viridis(option="turbo",trans="log10")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=Year))+ 
#scale_color_viridis(option="turbo")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=fMonth)) +
#scale_color_viridis_d(option="turbo")+theme_cl

Month as continuous

print(plot(getViz(mod4), all.terms=T, select=ncol(predict(mod4, type="terms"))))

season

Season

print(plot(getViz(modseason), all.terms=T, select=ncol(predict(modseason, type="terms"))))

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="D"), main="Diapause")

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="A"), main="Active")

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="A"), main="Active")

print(plot(getViz(modseason), all.terms=T), pages=1)

Simulation of bioness data

datsp <-  dat %>%  filter(TAXON_STADE=="Cfin_CV")

Month as factor

dat200 <-  datsp %>%  dplyr::filter(ZMax < 200 & Zstation > 200) %>% dplyr::group_by(ID) %>%   dplyr::slice(which.max(ZMax)) 

dat_zstation <-  datsp %>% dplyr::group_by(ID) %>%   slice(which.max(ZMax))

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(mod3, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Month as continuous

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(mod4, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Season

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(modseason, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Cfin CIV

load("gamms/Cfin_CIVgamms3_4_remove01.RData"); dattax_month <- dattax %>%  mutate(fMonth=as.factor(Month))
load("gamms/Cfin_CIVgammsseason_remove01.RData"); dattax_season <- dattax %>%  mutate(fMonth=as.factor(Month))

Table

AIC(mod3,mod4, modseason)
##                 df        AIC
## mod3      185.3930  -3763.047
## mod4      118.6907 -10792.087
## modseason 126.7773  -3780.707
#doesn't work don't know why?? all factor levels are there. doesn't even wirk with predict dataC 
#tss_tablemod3<- TSS_mclapply_ie_stn(mcomp=mod3, Data=dattax_month, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
#tss_tablemod4<- TSS_mclapply_ie_stn(mcomp=mod4, Data=dattax_month, gam_type="standard", fam="betar(eps=0.0000001)", nrem=4,Sp="Cfin_CVI")  
#tss_tablemodseason<- TSS_mclapply_ie_stn(mcomp=modseason, Data=dattax_season, gam_type="standard", fam="betar(eps=0.0000001)", nrem=4,Sp="Cfin_CVI")  
#tss_table<- rbind(tss_tablemod3,tss_tablemod4,tss_tablemodseason)

#  kbl(tss_table) %>%
#  kable_paper(full_width = F) 

Profiles

Month as factor

month as factor ### Month as continuous month as continuous ### Season month as season

Residuals patterns

Month as factor

dattax_month <-  dattax_month %>%  mutate(residuals=residuals(mod3),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_month, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

 dattax_month$linpred<- as.vector(predict(mod3, type="link"))

ggplot(dattax_month, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=predict(mod3, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Month as continuous

dattax_month <-  dattax_month %>%  mutate(residuals=residuals(mod4),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_month, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

 dattax_month$linpred<- as.vector(predict(mod4, type="link"))

ggplot(dattax_month, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=predict(mod4, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Season

dattax_season <-  dattax_season %>%  mutate(residuals=residuals(modseason),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_season, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

dattax_season$linpred<- as.vector(predict(mod4, type="link"))

ggplot(dattax_season, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_season, aes(y=predict(modseason, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Vis.gam

Month as factor

print(plot(getViz(mod3), all.terms=T, select=ncol(predict(mod3, type="terms"))))

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="3"), main="Month3")

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="8"), main="Month8")

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="10"), main="Month10")

print(plot(getViz(mod3), all.terms=T), pages=1)

new_data <- tidyr::expand(dattax_month, nesting(ID,fMonth),
                          Zstation = seq(50,500,10),
                          percZ_stn=0.5)
m1_pred <- bind_cols(new_data,
                     as.data.frame(predict(mod3, newdata = new_data,
                                           se.fit = TRUE)))

ggplot(m1_pred, aes(x = Zstation, y = fit, group = ID,
                    colour = fMonth)) +
    geom_line() + theme_cl+
    facet_wrap(~ fMonth) 

#re<- as.data.frame(predict(mod3, type="terms")) %>%  select(`s(ID)`)
#dattax_month$re<- re$`s(ID)`
#dattax_month$residuals<-residuals(mod3)#

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=ind.m2_total))+ scale_color_viridis(option="turbo",trans="log10") +theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=LONGITUDE)) +
#scale_color_viridis(option="turbo")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=Zstation))+ scale_color_viridis(option="turbo",trans="log10")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=Year))+ 
#scale_color_viridis(option="turbo")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=fMonth)) +
#scale_color_viridis_d(option="turbo")+theme_cl

Month as continuous

print(plot(getViz(mod4), all.terms=T, select=ncol(predict(mod4, type="terms"))))

season

Season

print(plot(getViz(modseason), all.terms=T, select=ncol(predict(modseason, type="terms"))))

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="D"), main="Diapause")

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="A"), main="Active")

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="A"), main="Active")

print(plot(getViz(modseason), all.terms=T), pages=1)

Simulation of bioness data

datsp <-  dat %>%  filter(TAXON_STADE=="Cfin_CIV")

Month as factor

dat200 <-  datsp %>%  dplyr::filter(ZMax < 200 & Zstation > 200) %>% dplyr::group_by(ID) %>%   dplyr::slice(which.max(ZMax)) 

dat_zstation <-  datsp %>% dplyr::group_by(ID) %>%   slice(which.max(ZMax))

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(mod3, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Month as continuous

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(mod4, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Season

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(modseason, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Cfin CIV-CVI

load("gamms/Cfin_CIV_CVIgamms3_4_remove01.RData"); dattax_month <- dattax %>%  mutate(fMonth=as.factor(Month))
load("gamms/Cfin_CIV_CVIgammsseason_remove01.RData"); dattax_season <- dattax %>%  mutate(fMonth=as.factor(Month))

Table

AIC(mod3,mod4, modseason)
##                 df        AIC
## mod3      276.4344  -4488.609
## mod4      153.7038 -10592.331
## modseason 194.2298  -4322.528
#doesn't work don't know why?? all factor levels are there. doesn't even wirk with predict dataC 
#tss_tablemod3<- TSS_mclapply_ie_stn(mcomp=mod3, Data=dattax_month, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
#tss_tablemod4<- TSS_mclapply_ie_stn(mcomp=mod4, Data=dattax_month, gam_type="standard", fam="betar(eps=0.0000001)", nrem=4,Sp="Cfin_CVI")  
#tss_tablemodseason<- TSS_mclapply_ie_stn(mcomp=modseason, Data=dattax_season, gam_type="standard", fam="betar(eps=0.0000001)", nrem=4,Sp="Cfin_CVI")  
#tss_table<- rbind(tss_tablemod3,tss_tablemod4,tss_tablemodseason)

#  kbl(tss_table) %>%
#  kable_paper(full_width = F) 
tss_tablemod3<- TSS_mclapply_ie_stn(mcomp=mod3, Data=dattax_month, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
tss_tablemod4<- TSS_mclapply_ie_stn(mcomp=mod4, Data=dattax_month, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
tss_tablemodseason<- TSS_mclapply_ie_stn(mcomp=modseason, Data=dattax_season, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
tss_table<- rbind(tss_tablemod3,tss_tablemod4,tss_tablemodseason)

  kbl(tss_table) %>%
  kable_paper(full_width = F) 
ObsPrev_abund Tresh_opt_e_i TSS_e_i TSSsd_e_i AUC_e_i corr_r_e_i p.corr_e_i intercept_e_i slope_e_i r2_e_i r2sd_e_i
0.65 NA/NA NA/NA NA/NA NA/NA 0.83/0.89 0/0 0.21/0.16 0.7/0.73 0.65/0.76 0.05/0
0.65 NA/NA NA/NA NA/NA NA/NA 0.87/0.86 0/0 0.26/0.2 0.66/0.67 0.74/0.7 0.07/0
0.65 NA/NA NA/NA NA/NA NA/NA 0.84/0.86 0/0 0.25/0.21 0.63/0.64 0.64/0.69 0.03/0

Profiles

Month as factor

month as factor ### Month as continuous month as continuous ### Season month as season

Residuals patterns

Month as factor

dattax_month <-  dattax_month %>%  mutate(residuals=residuals(mod3),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_month, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

 dattax_month$linpred<- as.vector(predict(mod3, type="link"))

ggplot(dattax_month, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=predict(mod3, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Month as continuous

dattax_month <-  dattax_month %>%  mutate(residuals=residuals(mod4),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_month, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

 dattax_month$linpred<- as.vector(predict(mod4, type="link"))

ggplot(dattax_month, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=predict(mod4, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Season

dattax_season <-  dattax_season %>%  mutate(residuals=residuals(modseason),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_season, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

dattax_season$linpred<- as.vector(predict(mod4, type="link"))

ggplot(dattax_season, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_season, aes(y=predict(modseason, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Vis.gam

Month as factor

print(plot(getViz(mod3), all.terms=T, select=ncol(predict(mod3, type="terms"))))

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="3"), main="Month3")

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="8"), main="Month8")

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="10"), main="Month10")

print(plot(getViz(mod3), all.terms=T), pages=1)

new_data <- tidyr::expand(dattax_month, nesting(ID,fMonth),
                          Zstation = seq(50,500,10),
                          percZ_stn=0.5)
m1_pred <- bind_cols(new_data,
                     as.data.frame(predict(mod3, newdata = new_data,
                                           se.fit = TRUE)))

ggplot(m1_pred, aes(x = Zstation, y = fit, group = ID,
                    colour = fMonth)) +
    geom_line() + theme_cl+
    facet_wrap(~ fMonth) 

#re<- as.data.frame(predict(mod3, type="terms")) %>%  select(`s(ID)`)
#dattax_month$re<- re$`s(ID)`
#dattax_month$residuals<-residuals(mod3)#

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=ind.m2_total))+ scale_color_viridis(option="turbo",trans="log10") +theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=LONGITUDE)) +
#scale_color_viridis(option="turbo")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=Zstation))+ scale_color_viridis(option="turbo",trans="log10")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=Year))+ 
#scale_color_viridis(option="turbo")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=fMonth)) +
#scale_color_viridis_d(option="turbo")+theme_cl

Month as continuous

print(plot(getViz(mod4), all.terms=T, select=ncol(predict(mod4, type="terms"))))

season

Season

print(plot(getViz(modseason), all.terms=T, select=ncol(predict(modseason, type="terms"))))

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="D"), main="Diapause")

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="A"), main="Active")

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="A"), main="Active")

print(plot(getViz(modseason), all.terms=T), pages=1)

Simulation of bioness data

datsp <-  dat %>%  filter(TAXON_STADE=="Cfin_CIV_CVI")

Month as factor

dat200 <-  datsp %>%  dplyr::filter(ZMax < 200 & Zstation > 200) %>% dplyr::group_by(ID) %>%   dplyr::slice(which.max(ZMax)) 

dat_zstation <-  datsp %>% dplyr::group_by(ID) %>%   slice(which.max(ZMax))

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(mod3, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Month as continuous

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(mod4, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Season

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(modseason, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Cfin CV-CVI

load("gamms/Cfin_CV_CVIgamms3_4_remove01.RData"); dattax_month <- dattax %>%  mutate(fMonth=as.factor(Month))
load("gamms/Cfin_CV_CVIgammsseason_remove01.RData"); dattax_season <- dattax %>%  mutate(fMonth=as.factor(Month))

Table

AIC(mod3,mod4, modseason)
##                 df        AIC
## mod3      278.2126  -4480.242
## mod4      153.4178 -10597.444
## modseason 194.1278  -4329.660
#doesn't work don't know why?? all factor levels are there. doesn't even wirk with predict dataC 
#tss_tablemod3<- TSS_mclapply_ie_stn(mcomp=mod3, Data=dattax_month, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
#tss_tablemod4<- TSS_mclapply_ie_stn(mcomp=mod4, Data=dattax_month, gam_type="standard", fam="betar(eps=0.0000001)", nrem=4,Sp="Cfin_CVI")  
#tss_tablemodseason<- TSS_mclapply_ie_stn(mcomp=modseason, Data=dattax_season, gam_type="standard", fam="betar(eps=0.0000001)", nrem=4,Sp="Cfin_CVI")  
#tss_table<- rbind(tss_tablemod3,tss_tablemod4,tss_tablemodseason)

#  kbl(tss_table) %>%
#  kable_paper(full_width = F) 
tss_tablemod3<- TSS_mclapply_ie_stn(mcomp=mod3, Data=dattax_month, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
tss_tablemod4<- TSS_mclapply_ie_stn(mcomp=mod4, Data=dattax_month, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
tss_tablemodseason<- TSS_mclapply_ie_stn(mcomp=modseason, Data=dattax_season, gam_type="standard", fam="betar(eps=0.001)", nrem=4,Sp="pcum")  
tss_table<- rbind(tss_tablemod3,tss_tablemod4,tss_tablemodseason)

  kbl(tss_table) %>%
  kable_paper(full_width = F) 
ObsPrev_abund Tresh_opt_e_i TSS_e_i TSSsd_e_i AUC_e_i corr_r_e_i p.corr_e_i intercept_e_i slope_e_i r2_e_i r2sd_e_i
0.65 NA/NA NA/NA NA/NA NA/NA 0.83/0.89 0/0 0.2/0.16 0.71/0.73 0.66/0.76 0.05/0
0.65 NA/NA NA/NA NA/NA NA/NA 0.86/0.86 0/0 0.25/0.2 0.66/0.67 0.75/0.7 0.08/0
0.65 NA/NA NA/NA NA/NA NA/NA 0.85/0.86 0/0 0.24/0.21 0.64/0.64 0.65/0.7 0.03/0

Profiles

Month as factor

month as factor ### Month as continuous month as continuous ### Season month as season

Residuals patterns

Month as factor

dattax_month <-  dattax_month %>%  mutate(residuals=residuals(mod3),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_month, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

 dattax_month$linpred<- as.vector(predict(mod3, type="link"))

ggplot(dattax_month, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=predict(mod3, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Month as continuous

dattax_month <-  dattax_month %>%  mutate(residuals=residuals(mod4),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_month, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point()+
  geom_smooth()

 dattax_month$linpred<- as.vector(predict(mod4, type="link"))

ggplot(dattax_month, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_month, aes(y=predict(mod4, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Season

dattax_season <-  dattax_season %>%  mutate(residuals=residuals(modseason),
                                          cat_perc= ifelse(percZ_stn < 0.25, "<25",
                                                           ifelse(percZ_stn >=0.25 & percZ_stn < 0.50, "25-50",
                                                                  ifelse(percZ_stn >= 0.50 & percZ_stn < 0.75, "50-75", ">75"))))


ggplot(dattax_season, aes(y=residuals, x=Month))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=LONGITUDE))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=Year))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

ggplot(dattax_season, aes(y=residuals, x=Zstation))+ facet_wrap(~cat_perc)+
  geom_point(aes(col=season))+
  geom_smooth()

dattax_season$linpred<- as.vector(predict(mod4, type="link"))

ggplot(dattax_season, aes(y=residuals, x=linpred))+ ggtitle("Homogeneity of residuals")+
  geom_point()+
  geom_smooth()

ggplot(dattax_season, aes(y=predict(modseason, type="response"), x=pcum))+ ggtitle("Prediction accuracy")+
  geom_point()+
  geom_smooth()

Vis.gam

Month as factor

print(plot(getViz(mod3), all.terms=T, select=ncol(predict(mod3, type="terms"))))

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="3"), main="Month3")

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="8"), main="Month8")

vis.gam(mod3, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(fMonth="10"), main="Month10")

print(plot(getViz(mod3), all.terms=T), pages=1)

new_data <- tidyr::expand(dattax_month, nesting(ID,fMonth),
                          Zstation = seq(50,500,10),
                          percZ_stn=0.5)
m1_pred <- bind_cols(new_data,
                     as.data.frame(predict(mod3, newdata = new_data,
                                           se.fit = TRUE)))

ggplot(m1_pred, aes(x = Zstation, y = fit, group = ID,
                    colour = fMonth)) +
    geom_line() + theme_cl+
    facet_wrap(~ fMonth) 

#re<- as.data.frame(predict(mod3, type="terms")) %>%  select(`s(ID)`)
#dattax_month$re<- re$`s(ID)`
#dattax_month$residuals<-residuals(mod3)#

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=ind.m2_total))+ scale_color_viridis(option="turbo",trans="log10") +theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=LONGITUDE)) +
#scale_color_viridis(option="turbo")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=Zstation))+ scale_color_viridis(option="turbo",trans="log10")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=Year))+ 
#scale_color_viridis(option="turbo")+theme_cl

#ggplot(dattax_month, aes(x=residuals, y=re))+geom_point(aes(col=fMonth)) +
#scale_color_viridis_d(option="turbo")+theme_cl

Month as continuous

print(plot(getViz(mod4), all.terms=T, select=ncol(predict(mod4, type="terms"))))

season

Season

print(plot(getViz(modseason), all.terms=T, select=ncol(predict(modseason, type="terms"))))

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="D"), main="Diapause")

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="A"), main="Active")

vis.gam(modseason, view=c("Zstation", "percZ_stn"),plot.type="contour", type="response", color="gray",cond=list(season="A"), main="Active")

print(plot(getViz(modseason), all.terms=T), pages=1)

Simulation of bioness data

datsp <-  dat %>%  filter(TAXON_STADE=="Cfin_CV_CVI")

Month as factor

dat200 <-  datsp %>%  dplyr::filter(ZMax < 200 & Zstation > 200) %>% dplyr::group_by(ID) %>%   dplyr::slice(which.max(ZMax)) 

dat_zstation <-  datsp %>% dplyr::group_by(ID) %>%   slice(which.max(ZMax))

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(mod3, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Month as continuous

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(mod4, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance") 

Season

dat200$corrected_bioness =dat200$cum_n_m2/as.vector(predict(modseason, dat200, type="response", exclude ='s(ID)', newdata.guaranteed = T))
dat_corr<- left_join(dat_zstation, dat200[,c("ID", "corrected_bioness")])

dat_corr <-  dat_corr %>%  mutate(Zcat = as.factor(ifelse(Zstation < 300, "< 300",
                                                  ifelse(Zstation >=300 & Zstation < 500, "300-500",
                                                         ifelse(Zstation>=500 & Zstation < 1000, "500-1000", 
                                                             ifelse(Zstation>=1000, ">1000" , NA))))) , Zcat=recode_factor(Zcat, `<300` ="<300", `300-500`="300-500",`500-1000`="500-1000",`>1000`=">1000"))

ggplot(data=dat_corr, aes(y=corrected_bioness, x=cum_n_m2))+
  geom_point(aes(col=Zcat))+ #montre pas de lien avec factor year
  geom_smooth(method="lm", se=F)+
  geom_abline(slope=1, intercept=0)+
  theme_few()+
  stat_poly_eq(formula = y~x,
                eq.with.lhs = "italic(hat(y))~`=`~",
                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                parse = TRUE) +   
  scale_y_continuous(trans="log10", name="Simulated corrected integrated abundance")+
  scale_x_continuous(trans="log10", name="Observed intergrated abundance")